From f821102450a157716034bb22b45d1f8720f091f6 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Mon, 26 Oct 2009 13:33:38 +0000 Subject: [PATCH] x86: IRQ Migration logic enhancement. To programme MSI's addr/vector safely, delay irq migration operation before acking next interrupt. In this way, it should avoid inconsistent interrupts generation due to non-atomic writing addr and data registers about MSI. Port the logic from Linux and tailor it for Xen. Signed-off-by: Xiantao Zhang --- xen/arch/x86/hpet.c | 1 + xen/arch/x86/hvm/hvm.c | 2 +- xen/arch/x86/io_apic.c | 42 ++++++++++++++++++++++++ xen/arch/x86/irq.c | 61 +++++++++++++++++++++++++++++++++++ xen/arch/x86/msi.c | 4 --- xen/include/asm-x86/io_apic.h | 9 ++++++ xen/include/asm-x86/irq.h | 6 ++++ xen/include/xen/irq.h | 2 ++ 8 files changed, 122 insertions(+), 5 deletions(-) diff --git a/xen/arch/x86/hpet.c b/xen/arch/x86/hpet.c index 1090f2dd59..f2ba93a883 100644 --- a/xen/arch/x86/hpet.c +++ b/xen/arch/x86/hpet.c @@ -289,6 +289,7 @@ static void hpet_msi_ack(unsigned int irq) struct irq_desc *desc = irq_to_desc(irq); irq_complete_move(&desc); + move_native_irq(irq); ack_APIC_irq(); } diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 8a14a8e13e..60fd0a5635 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -243,7 +243,7 @@ void hvm_migrate_pirqs(struct vcpu *v) continue; irq = desc - irq_desc; ASSERT(MSI_IRQ(irq)); - desc->handler->set_affinity(irq, *cpumask_of(v->processor)); + irq_set_affinity(irq, *cpumask_of(v->processor)); spin_unlock_irq(&desc->lock); } spin_unlock(&d->event_lock); diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c index 265e40fc33..4337484fdb 100644 --- a/xen/arch/x86/io_apic.c +++ b/xen/arch/x86/io_apic.c @@ -1379,6 +1379,7 @@ static void ack_edge_ioapic_irq(unsigned int irq) struct irq_desc *desc = irq_to_desc(irq); irq_complete_move(&desc); + move_native_irq(irq); if ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) @@ -1419,6 +1420,38 @@ static void setup_ioapic_ack(char *s) } custom_param("ioapic_ack", setup_ioapic_ack); +static bool_t io_apic_level_ack_pending(unsigned int irq) +{ + struct irq_pin_list *entry; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + entry = &irq_2_pin[irq]; + for (;;) { + unsigned int reg; + int pin; + + if (!entry) + break; + + pin = entry->pin; + if (pin == -1) + continue; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + /* Is the remote IRR bit set? */ + if (reg & IO_APIC_REDIR_REMOTE_IRR) { + spin_unlock_irqrestore(&ioapic_lock, flags); + return 1; + } + if (!entry->next) + break; + entry = irq_2_pin + entry->next; + } + spin_unlock_irqrestore(&ioapic_lock, flags); + + return 0; +} + static void mask_and_ack_level_ioapic_irq (unsigned int irq) { unsigned long v; @@ -1456,6 +1489,10 @@ static void mask_and_ack_level_ioapic_irq (unsigned int irq) v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); ack_APIC_irq(); + + if ((irq_desc[irq].status & IRQ_MOVE_PENDING) && + !io_apic_level_ack_pending(irq)) + move_native_irq(irq); if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); @@ -1503,6 +1540,10 @@ static void end_level_ioapic_irq (unsigned int irq) ack_APIC_irq(); + if ((irq_desc[irq].status & IRQ_MOVE_PENDING) && + !io_apic_level_ack_pending(irq)) + move_native_irq(irq); + if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); @@ -1564,6 +1605,7 @@ static void ack_msi_irq(unsigned int irq) struct irq_desc *desc = irq_to_desc(irq); irq_complete_move(&desc); + move_native_irq(irq); if ( msi_maskable_irq(desc->msi_desc) ) ack_APIC_irq(); /* ACKTYPE_NONE */ diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c index 74d096f462..6e566ab9d9 100644 --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -455,6 +455,67 @@ void __setup_vector_irq(int cpu) } } +void move_masked_irq(int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (likely(!(desc->status & IRQ_MOVE_PENDING))) + return; + + desc->status &= ~IRQ_MOVE_PENDING; + + if (unlikely(cpus_empty(desc->pending_mask))) + return; + + if (!desc->handler->set_affinity) + return; + + /* + * If there was a valid mask to work with, please + * do the disable, re-program, enable sequence. + * This is *not* particularly important for level triggered + * but in a edge trigger case, we might be setting rte + * when an active trigger is comming in. This could + * cause some ioapics to mal-function. + * Being paranoid i guess! + * + * For correct operation this depends on the caller + * masking the irqs. + */ + if (likely(cpus_intersects(desc->pending_mask, cpu_online_map))) + desc->handler->set_affinity(irq, desc->pending_mask); + + cpus_clear(desc->pending_mask); +} + +void move_native_irq(int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (likely(!(desc->status & IRQ_MOVE_PENDING))) + return; + + if (unlikely(desc->status & IRQ_DISABLED)) + return; + + desc->handler->disable(irq); + move_masked_irq(irq); + desc->handler->enable(irq); +} + +/* For re-setting irq interrupt affinity for specific irq */ +void irq_set_affinity(int irq, cpumask_t mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc->handler->set_affinity) + return; + + ASSERT(spin_is_locked(&desc->lock)); + desc->status |= IRQ_MOVE_PENDING; + cpus_copy(desc->pending_mask, mask); +} + asmlinkage void do_IRQ(struct cpu_user_regs *regs) { struct irqaction *action; diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c index 6bf4e6ef69..261da58185 100644 --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -231,7 +231,6 @@ static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) u8 slot = PCI_SLOT(dev->devfn); u8 func = PCI_FUNC(dev->devfn); - mask_msi_irq(entry->irq); pci_conf_write32(bus, slot, func, msi_lower_address_reg(pos), msg->address_lo); if ( entry->msi_attrib.is_64 ) @@ -244,7 +243,6 @@ static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) else pci_conf_write16(bus, slot, func, msi_data_reg(pos, 0), msg->data); - unmask_msi_irq(entry->irq); break; } case PCI_CAP_ID_MSIX: @@ -252,13 +250,11 @@ static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) void __iomem *base; base = entry->mask_base; - mask_msi_irq(entry->irq); writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET); writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET); writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET); - unmask_msi_irq(entry->irq); break; } default: diff --git a/xen/include/asm-x86/io_apic.h b/xen/include/asm-x86/io_apic.h index e064d4a7d5..6781ac1bac 100644 --- a/xen/include/asm-x86/io_apic.h +++ b/xen/include/asm-x86/io_apic.h @@ -22,6 +22,15 @@ #define IO_APIC_ID(idx) (mp_ioapics[idx].mpc_apicid) +/* I/O Unit Redirection Table */ +#define IO_APIC_REDIR_VECTOR_MASK 0x000FF +#define IO_APIC_REDIR_DEST_LOGICAL 0x00800 +#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000 +#define IO_APIC_REDIR_SEND_PENDING (1 << 12) +#define IO_APIC_REDIR_REMOTE_IRR (1 << 14) +#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15) +#define IO_APIC_REDIR_MASKED (1 << 16) + /* * The structure of the IO-APIC: */ diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h index c4fbaeb58d..90f5fd26f3 100644 --- a/xen/include/asm-x86/irq.h +++ b/xen/include/asm-x86/irq.h @@ -138,6 +138,12 @@ int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); int bind_irq_vector(int irq, int vector, cpumask_t domain); +void move_native_irq(int irq); + +void move_masked_irq(int irq); + +void irq_set_affinity(int irq, cpumask_t mask); + #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq]) #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq]) diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h index bf81b6b8db..23565ae2f3 100644 --- a/xen/include/xen/irq.h +++ b/xen/include/xen/irq.h @@ -24,6 +24,7 @@ struct irqaction { #define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */ #define IRQ_GUEST 16 /* IRQ is handled by guest OS(es) */ #define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */ +#define IRQ_MOVE_PENDING 64 /* IRQ is migrating to another CPUs */ #define IRQ_PER_CPU 256 /* IRQ is per CPU */ /* Special IRQ numbers. */ @@ -75,6 +76,7 @@ typedef struct irq_desc { int irq; spinlock_t lock; cpumask_t affinity; + cpumask_t pending_mask; /* IRQ migration pending mask */ /* irq ratelimit */ s_time_t rl_quantum_start; -- 2.30.2